home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Interactive Reference Guide
/
C-C++ Interactive Reference Guide.iso
/
c_ref
/
csource3
/
157_01
/
tr.c
< prev
Wrap
Text File
|
1987-10-12
|
11KB
|
389 lines
/*
HEADER: CUG
TITLE: TR.C - Translate Characters
VERSION: 1.00
DATE: 05/03/85
DESCRIPTION: "A full implementation of the UNIX 'tr' utility.
It copies input from 'stdin' to 'stdout' with
substitution or deletion of selected characters
that are specified in the command line that
invokes the program."
KEYWORDS: tr, filter, UNIX, translate
SYSTEM: Any
FILENAME: TR.C
WARNINGS: "Any operating system that converts all command
line lowercase characters to uppercase makes this
utility more of a curiosity than a useful tool.
Lowercase characters can only be specified by
their octal equivalents."
CRC: xxxx
SEE-ALSO: None
AUTHORS: Ian Ashdown - byHeart Software
COMPILERS: Any C compiler
REFERENCES: AUTHORS: Bell Telephone Laboratories;
TITLE: UNIX Programmer's Manual Vol. 1, p. 166;
ENDREF
*/
/*-------------------------------------------------------------*/
/* TR.C - Translate Characters
*
* Version 1.00 May 5th, 1985
*
* Copyright 1985: Ian Ashdown
* byHeart Software
* 1089 West 21st Street
* North Vancouver, B.C. V7P 2C2
* Canada
*
* This program may be copied for personal, non-commercial use
* only, provided that the above copyright notice is included in
* all copies of the source code. Copying for any other use
* without previously obtaining the written permission of the
* author is prohibited.
*
* pHILANTHROPICAL nOTES:
*
* Considerable time and effort went into the development of this
* software, which was expressly written for the public domain.
* The author will gladly accept any and all monetary
* contributions for the purpose of continuing such work!
*
* USAGE: tr [-cds] [string_1 [string_2] ]
*
* Notes:
*
* This is a full implementation of the UNIX "tr" utility. It
* copies input from "stdin" to "stdout" with substitution or
* deletion of selected characters. Input characters of
* "string_1" are mapped into the corresponding characters of
* "string_2". When "string_2" is shorter than "string_1", the
* last character of "string_2" is repeatedly mapped into the
* remaining characters of "string_1".
*
* Any combination of the options "-cds" may be used. These
* options are:
*
* -c COMPLEMENT option. All input characters except
* those of "string_1" are translated to the first
* character of "string_2". Any remaining characters
* of "string_2" are ignored.
*
* -d DELETE option. All input characters matching
* those in "string_1" are deleted from the output
* to "stdout". "String_2" is not used.
*
* -s SQUEEZE option. All sequences of repeated
* characters in the input from "stdin" that match
* characters in "string_1" are each converted to a
* single character (their "string_2" translations).
*
* In either string, the notation "x-y" means the range of
* characters from 'x' to 'y' in increasing ASCII order.
*
* The following "\x"-style escape sequences are supported:
*
* \n newline (non-UNIX)
* \t horizontal tab (non-UNIX)
* \b backspace (non-UNIX)
* \r carriage return (non-UNIX)
* \f form feed (non-UNIX)
* \ddd ddd (where 'ddd' is one to three
* octal digits)
* \c c (where 'c' is anything else)
*
* BUGS:
*
* The ASCII NUL character is always deleted from the output - it
* cannot be specified in either string.
*
* Some operating system command processors will not pass
* lowercase characters and certain other reserved characters to
* a program in command-line arguments (e.g. - CP/M). All that
* can be done is to either specify these characters by their
* octal equivalents (e.g. - 'a' is \141, 'z' is \172), or modify
* this program such that it specifically asks for the two
* strings after the program has been invoked (which makes the
* utility incompatible with the UNIX version of "tr").
*/
/*** Definitions ***/
#define TRUE 1
#define FALSE 0
#define OPT_ERR 0 /* Error codes */
#define CMD_ERR 1
#define BSL_ERR 2
#define DSH_ERR 3
/*** Typedefs ***/
typedef int BOOL; /* Boolean flag */
/*** Include Files ***/
#include <stdio.h>
#include <ctype.h>
/*** Main Body Of Program ***/
main(argc,argv)
int argc;
char **argv;
{
static char translate[128]; /* Character translation array */
char ch_1, /* String_1 character */
ch_2, /* String_2 character */
low_1 = NULL, /* String_1 low range character */
high_1, /* String_1 high range character */
low_2 = NULL, /* String_2 low range character */
high_2, /* String_2 low range character */
*str_1, /* String_1 pointer */
*str_2, /* String_2 pointer */
*opt_ptr, /* Command line option pointer */
exp_str();
int i, /* Temporary variable */
trans, /* Translation character */
curr, /* Current input character */
prev = NULL; /* Previous input character */
BOOL cflag = FALSE, /* Complement option flag */
dflag = FALSE, /* Delete option flag */
sflag = FALSE, /* Squeeze option flag */
r_1 = FALSE, /* String_1 range flag */
r_2 = FALSE; /* String_2 range flag */
void error();
/* Parse the command line for user-selected options */
while(--argc && (*++argv)[0] == '-')
for(opt_ptr = argv[0]+1; *opt_ptr != '\0'; opt_ptr++)
switch(toupper(*opt_ptr))
{
case 'C': /* Complement flag */
cflag = TRUE;
break;
case 'D': /* Delete flag */
dflag = TRUE;
break;
case 'S': /* Squeeze flag */
sflag = TRUE;
break;
default: /* Illegal command line option */
error(OPT_ERR,NULL);
}
/* Check for valid command line */
if(!((dflag == TRUE && argc) || (argc > 1)))
error(CMD_ERR);
/* Expand the source and translation strings */
str_1 = argv;
str_2 = ++argv;
if(cflag == FALSE) /* Complement option not selected */
{
while(ch_1 = exp_str(str_1,&low_1,&high_1,&r_1))
{
if(curr = exp_str(str_2,&low_2,&high_2,&r_2))
ch_2 = curr;
translate[ch_1] = ch_2;
}
for(i = 1; i < 128; i++)
if(translate[i] == NULL)
translate[i] = i;
}
else /* Complement option selected */
{
while(ch_1 = exp_str(str_1,&low_1,&high_1,&r_1))
translate[ch_1] = ch_1;
ch_2 = exp_str(str_2,&low_2,&high_2,&r_2);
for(i = 1; i < 128; i++)
if(translate[i] == NULL)
translate[i] = ch_2;
}
/* Process the input */
while((curr = getchar()) != EOF)
{
trans = translate[curr];
if(dflag == FALSE) /* Delete option not selected */
{
if(sflag == FALSE) /* Squeeze option not selected */
putchar(trans);
else /* Squeeze option selected */
{
if(curr == trans)
putchar(trans);
else
if(trans != prev)
putchar(trans);
prev = trans;
}
}
else /* Delete option selected */
if(curr == trans)
putchar(trans);
}
}
/*** Functions ***/
/* EXP_STR() - Expand a character string. The arguments passed
* are a pointer to a pointer to a character string
* ("str"), a pointer to the low value of a character
* range ("low"), a pointer to the high value of the
* same range ("high"), and a pointer to a boolean
* flag ("range") that indicated whether or not the
* range is currently being expanded. The current
* character of "str" or of a range implicit in "str"
* currently being expanded is returned. "exp_str"
* uses pointer to variables external to the function
* rather than internal static variables so that the
* calling function can use more than one set of
* variables at a time.
*/
char exp_str(str,low,high,range)
char **str,
*low,
*high;
BOOL *range;
{
char curr;
if(*range == FALSE) /* Not expanding character range */
{
switch(curr = *(*str)++)
{
case NULL: /* End of string - back up pointer */
(*str)--;
return NULL;
case '\\': /* Must be '\x'-style escape sequence */
curr = literal_sw(str);
break;
case '-': /* Must be character range */
if(*low == NULL)
break;
*high = *(*str)++;
if(*high == '-' || *high == NULL)
error(DSH_ERR);
if(*high == '\\')
*high = literal_sw(str);
*range = TRUE;
break;
default:
break;
}
if(*range == FALSE) /* Not expanding character range */
{
*low = curr;
return curr;
}
}
curr = ++(*low); /* Expanding character range */
if(curr == *high)
*range = FALSE;
return curr;
}
/* LITERAL_SW() - Convert characters following '\' operator to
* their equivalents. The following escape
* sequences are supported:
*
* \n newline
* \t horizontal tab
* \b backspace
* \r carriage return
* \f form feed
* \ddd ddd (where 'ddd' is one to three
* octal digits)
* \c c (where 'c' is anything else)
*
* The equivalent character is returned. If a NULL
* is passed as the argument, an error message is
* generated.
*/
int literal_sw(buff_ptr)
char **buff_ptr;
{
char c; /* Current input character */
int num_1, /* Octal constant calculation variables */
num_2;
switch(c = *(*buff_ptr)++)
{
case 'n': /* Convert to newline */
return '\n';
case 't': /* Convert to horizontal tab */
return '\t';
case 'b': /* Convert to backspace */
return '\b';
case 'r': /* Convert to carriage return */
return '\r';
case 'f': /* Convert to form feed */
return '\f';
case '0': /* Convert to octal constant */
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
num_1 = c - '0';
if((c = **buff_ptr) >= '0' && c <= '7')
{
(*buff_ptr)++;
num_1 = (num_1<<3) + c - '0';
if((c = **buff_ptr) >= '0' && c <= '7')
if((num_2 = (num_1<<3) + c - '0') <= 0177)
{
(*buff_ptr)++;
return num_2;
}
}
return num_1;
case '\0':
error(BSL_ERR);
default: /* Must be a literal character */
return c;
}
}
/* ERROR() - Error reporting procedure */
void error(n)
int n;
{
fprintf(stderr,"\007\nTR: Error - ");
switch(n)
{
case OPT_ERR:
fputs("Illegal command line option.",stderr);
break;
case CMD_ERR:
fputs("Illegal command line.",stderr);
break;
case BSL_ERR:
fputs("Illegal use of '\\' operator.",stderr);
break;
case DSH_ERR:
fputs("Illegal use of '-' operator.",stderr);
break;
default:
break;
}
fputs("\n\nUsage: tr [-cds] [string_1 [string_2] ]\n",stderr);
exit(0);
}
/* End of TR.C */
':
num_1 = c - '0';